This page is about trend in the number of COVID-19 cases in the US
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## 载入程辑包:'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(rvest)
##
## 载入程辑包:'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
library(ggplot2)
library(lubridate)
##
## 载入程辑包:'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Import data
covid_cum = read_csv("data/covid_cumulative_cases.csv", skip = 2) %>%
janitor::clean_names()
## Rows: 657 Columns: 3
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): State, Date
## dbl (1): Total Cases
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning in FUN(X[[i]], ...): strings not representable in native encoding will
## be translated to UTF-8
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C4>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00D6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E4>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00F6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00DF>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00D8>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00F8>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C5>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E5>' to native encoding
covid_day = read_csv("data/covid_daily_cases.csv", skip = 2) %>%
janitor::clean_names()
## Rows: 656 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): State, Date
## dbl (3): New Cases, 7-Day Moving Avg, Historic Cases
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
covid_daily = left_join(covid_day, covid_cum, by = "date") %>%
select(date, new_cases, total_cases) %>%
separate(date, into = c("month","day","year")) %>%
mutate(month = factor(month, levels = month.abb)) %>%
group_by(year, month) %>%
arrange(year, month, day) %>%
mutate(date = make_date(year, month, day)) %>%
arrange(date)
covid_monthly = covid_daily %>%
select(-day) %>%
summarize(monthly = sum(new_cases))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
covid_seasonal = covid_monthly %>%
mutate(quarter = recode(month,
"Jan" = "Q1",
"Feb" = "Q1",
"Mar" = "Q1",
"Apr" = "Q2",
"May" = "Q2",
"Jun" = "Q2",
"Jul" = "Q3",
"Aug" = "Q3",
"Sep" = "Q3",
"Oct" = "Q4",
"Nov" = "Q4",
"Dec" = "Q4"
)) %>%
group_by(year, quarter) %>%
summarize(quarterly = sum(monthly)) %>%
mutate(date = paste(year, "-", quarter))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
Plots Daily
daily_fig = plot_ly(covid_daily)
daily_fig %>%
add_trace(x = ~date, y = ~new_cases, type = "bar", yaxis="y", name = "new") %>%
add_trace(x = ~date, y = ~total_cases, type = "scatter", mode = "lines", yaxis = "y2", name = "cumulative") %>%
layout(yaxis=list(title = "daily new cases", side="left"),
yaxis2=list(title = "cumulative cases", side="right",overlaying="y"),
showlegend=TRUE)
Quarterly (to correspond with the consumption data)
covid_seasonal %>%
plot_ly(x = ~date, y = ~quarterly, type = "bar")